## Table J.2: PanelMatch Coefficient Estimates for Senate, Matching on 
# Politician and NAICS 3-Digit Industry: ---------------------------------------

## Instructions ----------------------------------------------------------------
# Step 1: Adjust DATA_DIR to where data is is located
# Step 2: Adjust OUT_DIR to where output folder is
# Step 3: Run entire script
## setup -----------------------------------------------------------------------


# R Code to be run for PanelMatch Results. Ran on High-Performance Cluster.
# uncomment to run.

# LOAD PACKAGES
library(PanelMatch)
library(dplyr)
options(scipen=999)

##### UNCOMMENT FROM HERE 
# ### SUBSETTING AND PATH
# ### 1. RUN ON SENATE ####
# naics_sub <- c("31", "32", "33") #### <- set industries to be analyzed
# chamber_sub <- c("senate") #### <- set chamber to be analyzed
# path <- "../data/don_lob_all_ind_20210203.rds" ### set path to be analyzes
# outcome_select <- "lob_any_bin"
# treatment_select <- "don_any_bin"
# 
# #### SET ANALYSIS PARAMETERS ######
# ## refinement_method <- "CBPS.weight" # set refinement
# refinement_method <- "mahalanobis" # set refinement
# size_match <- 10 # size of matched set
# lags_match_dv <- "1:3" # DV lags to match on
# covariates_match <- "l_sale + l_emp" # other covariates to match on
# ## exact_match <- c("naics3", "govtrack_id") # exact matchin
# exact_match <- c("govtrack_id", "naics3") # ****exact matching. HERE ADDED NAICS 3 DIGIT****
# treatments <- c("don_any_bin", "don_pac_bin", "don_ceo_bin", "don_gov_bin", "don_emp_bin") # treatment variable
# treatname <- c("AnyDon", "PacDon", "CeoDon", "GovDon", "EmpDon") # treatment variable
# outcomes <- c("lob_spon_bin", "lob_cosp_only_bin", "lob_comm_bin", "lob_any_bin")
# outcome_names <- c("SpLob", "CoSpLob", "CommLob", "AnyLob")
# chamber_names <- c("Senate", "House")
# 
# 
# #### CREATE A DF WITH ANALYSIS PAREMTERS ######
# specs_df <- data.frame(chamber = rep(rep(c("senate", "house"), each = 4), 5),
#                        chamber_names = rep(rep(chamber_names, each = 4), 5),
#                        treatments = rep(treatments, each = 8),
#                        treatname = rep(treatname, each = 8),
#                        outcomes = rep(outcomes, 10),
#                        outcome_names = rep(outcome_names, 10),
#                        refinement_method = rep(refinement_method, 40),
#                        size_match = rep(size_match, 40),
#                        exact_match1 = rep(exact_match[1], 40),
#                        exact_match2 = rep(exact_match[2], 40), # uncommented here for 2-digit NAICS matching, in addition to politician matching
#                        covariate_match = rep(covariates_match, 40),
#                        lags_match_dv = rep(lags_match_dv, 40),
#                        stringsAsFactors = FALSE)
# 
# specs_df <- specs_df %>% filter(chamber %in% chamber_sub & outcomes == outcome_select & treatments == treatment_select) # subset by chosen chamber
# 
# 
# 
# 
# # LOAD DATA, SUBSET, and keep only variables needed
# don_lob <- readRDS(file_in) %>%
#   filter(naics2 %in% naics_sub, chamber == chamber_sub) %>%
#   mutate(l_sale = log(sale+1), l_emp = log(emp+1)) %>%
#   select(gvkey_govtrack_id, gvkey, govtrack_id, year, naics, naics3, naics2, chamber, # IDs
#          lob_any_bin, lob_spon_bin, lob_cosp_only_bin, lob_comm_bin, # outcomes
#          don_any_bin, don_pac_bin, don_emp_bin, don_ceo_bin, don_gov_bin, # outcomes, reverse analysis
#          l_sale, l_emp) %>% # some recodings necessary
#   mutate(gvkey_govtrack_id_numeric = as.integer(as.factor(gvkey_govtrack_id)),
#          year = as.integer(year)) #
# 
# 
# # need to set as DF, if not PM breaks sometimes
# don_lob <- as.data.frame(don_lob)
# 
# # subset to smaller sample for testing
# # set.seed(1234)
# # gvkey_sub <- sample(unique(don_lob$gvkey), 100)
# # don_lob_sub <- don_lob %>% filter(gvkey %in% gvkey_sub)
# # i <- 1
# 
# # print number of rows
# print(paste0("using dataset with ", nrow(don_lob), " rows."))
# 
# 
# 
# # start loop across all outcomes
# for (i in 1:dim(specs_df)[1]) {
#   
#   # set up the respective outcome names for plot titles
#   outcome_name <- specs_df$outcome_names[i] # change with outcome used
#   chamber_name <- specs_df$chamber_names[i] #
#   treatment_name <- specs_df$treatname[i]
#   
#   
#   
#   covs_formula <- paste0("~ I(lag(", specs_df$outcomes[i], ", ", lags_match_dv, ")) + ", specs_df$covariate_match[i])
#   
#   
#   
#   ##### Run Panelmatch
#   Matches.lobby <- PanelMatch(lag = 3, lead = 0:3, time.id = "year", unit.id = "gvkey_govtrack_id_numeric",
#                               treatment = specs_df$treatments[i], match.missing = TRUE,
#                               outcome.var = specs_df$outcomes[i], qoi = "att",
#                               covs.formula = as.formula(covs_formula),
#                               exact.match.variables = c(specs_df$exact_match1[i], specs_df$exact_match2[i]), # uncommented for matching on both govtrack ID and 2-digit NAICS2
#                               ## exact.match.variables = c(specs_df$exact_match1[i]),
#                               forbid.treatment.reversal = FALSE,
#                               restrict.control.period = 3,
#                               size.match = specs_df$size_match[i],
#                               data = don_lob,
#                               refinement.method = specs_df$refinement_method[i])
#   
#   # Determine output name based on specficiation
#   output_name <- paste0("manuf_ann_", specs_df$chamber[i], "_maha_",
#                         specs_df$treatname[i], "_" , specs_df$outcome_names[i], "_jop_naics3")
#   
#   # Save PanelMatch Object
#   save(Matches.lobby, file = paste0("../output/MSets_", output_name))
#   print(paste0(i,". ", "saved **Matched Sets** object to ", "../output/MSets_", output_name))
#   gc()
#   
#   
#   # Run PanelEstimate
#   PM.lobby <- PanelEstimate(sets = Matches.lobby, data = don_lob, se.method = "bootstrap")
#   
#   # Save PanelEstimate Object
#   save(PM.lobby, file = paste0("../output/EstObj_", output_name))
#   print(paste0(i,". ", "saved **Panel Estimate** object to ", "../output/EstObj_", output_name))
#   gc()
#   
#   
# }
# 
# gc()
##### UNCOMMENT TO HERE 



### 3. Plot Results/Table ####
# number of manufacturing industries
# length(unique(don_lob$naics3))
# sort(table(don_lob$naics3))
# naics3_firms <- don_lob %>%
#   group_by(naics3) %>%
#   summarise(n = length(unique(gvkey)))
# mean(naics3_firms$n) # mean number of firms
# median(naics3_firms$n) # median number of firms


DATA_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/data/figJ2"
OUT_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/output"    

# senate results
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_AnyDon_AnyLob_jop_naics3"))
load(file.path(DATA_DIR, "MSets_manuf_ann_senate_maha_AnyDon_AnyLob_jop_naics3"))


# create table
library(kableExtra)
n_row <- 2
tab <- data.frame(Chamber = rep(NA, n_row),
                  `t+0` = rep(NA, n_row),
                  `t+1` = rep(NA, n_row),
                  `t+2` = rep(NA, n_row),
                  `t+3` = rep(NA, n_row))

tab$Chamber[1] <- "\\textit{Senate}"
tab$t.0[1] <- round(PM.lobby$estimates[1], 3)
tab$t.0[2] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[1] <- round(PM.lobby$estimates[2], 3)
tab$t.1[2] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[1] <- round(PM.lobby$estimates[3], 3)
tab$t.2[2] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[1] <- round(PM.lobby$estimates[4], 3)
tab$t.3[2] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")") 

colnames(tab) <- c("Chamber", "\\textit{t+0}", "\\textit{t+1}", "\\textit{t+2}", "\\textit{t+3}")

options(knitr.kable.NA = "") 
tab %>%
  kable(format = "latex", escape = FALSE, align = "lcccc", 
        caption = "\\textbf{PanelMatch Coefficient Estimates for Senate, Matching on Politician and NAICS 3-Digit Industry}: This table reports the estimated coeficients
and associated standard errors, computed based on a block-bootstrap procedure, which are used
to produce Senate results as in Figure 4, matching on both  politician and NAICS 3-digit industry. The number of observations is  3,058,224 dyads in the Senate. To compute the Senate results, we
identified all dyads where a particular company donated to a particular politician for the first
time in at least three years. For the Senate, there are 3,778 such instances", 
        label = "tab_j2") %>%
  add_header_above(c(" " = 1, "Estimated Effect of Donation on Probability to Lobby in:" = 4), line = FALSE) %>%
  add_header_above(c(" " = 1, "Dependent Variable:" = 4), italic = TRUE)  %>%
  footnote(general = "Std. errors in parentheses",footnote_as_chunk = TRUE) %>%
  kable_styling(full_width = TRUE) %>%
  save_kable(file.path(OUT_DIR, "table_j2.tex")) # panelmatch_senate_naics3.tex


# clean up
gc()
rm(list = ls())